# ------------------------------------------------------------------------------
# Replication Materials
# 
# title: Eliciting Beliefs as Distributions in Online Surveys
# journal: Political Analysis
# authors: Lucas Leemann, Richard Traunmüller, and Lukas Stoetzer
# date: August 2020
# ------------------------------------------------------------------------------


  library(tidyverse)
  library(xtable)
  options(xtable.comment = FALSE)
  source("fun/fun_electbeleifs_MLE.R")
  source("fun/fun_utilities.R")

  

  source("00_DataCleaner_symmetric.R") # Richard Code to clean Data
  dat_sym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_asymmetric.R") # Richard Code to clean Data
  dat_asym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_symmetric_variance.R") # Richard Code to clean Data
  dat_sym_var <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_asymmetric_variance.R") # Richard Code to clean Data
  dat_asym_var <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_nachtrag_sym.R") # Richard Code to clean Data
  dat_nach_sym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_nachtrag_asym.R") # Richard Code to clean Data
  dat_nach_asym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

# Estimation  ============

  res <- list() # Result List

#+ Quantile, echo=FALSE, include=FALSE
## Quantile Question  ============

  # Estimate Model for Quantile Question Symmetric, Low Variance
  res[["sym_lvar_quant"]] <- est_split_cov(dat_sym, tv=c(60,60))
    
  # Estimate Model for Quantile Question Asymmetric, Low Variance
  res[["asym_lvar_quant"]]  <- est_split_cov(dat_asym, tv=c(60,30)) 

  # Estimate Model for Quantile Question Symmetric, High Variance
  res[["sym_hvar_quant"]] <-  est_split_cov(dat_sym_var, tv=c(30,30)) 

  # Estimate Model for Quantile Question Asymmetric Variance, High Variance
  res[["asym_hvar_quant"]] <- est_split_cov(dat_asym_var, tv= c(30,15))

#+ Quantile2, echo=FALSE, include=FALSE
## Quantile Question (without correction)  ============

  # Estimate Model for Quantile Question Symmetric, Low Variance
  # res[["sym_lvar_qtnsc"]] <- dat_nach_sym %>%
  #   creat_dat_Y(method="QuantileNoCorrect") %>%
  #   est_eB_beta(true.val = c(60,60))
  # 
  # # Estimate Model for Quantile Question Asymmetric, Low Variance
  # res[["asym_lvar_qtnsc"]]  <- dat_nach_asym %>%
  #   creat_dat_Y(method="Quantile") %>%
  #   est_eB_beta(true.val = c(60,30))

## Intervall Question (Wide) ============

  # Estimate Model for Quantile Question Symmetric, low variance
  res[["sym_lvar_intwi"]]  <- est_split_cov(dat_sym, m ="Interval",
                                            tv=c(60,60), co = c(0.40,0.60))
  
  # Estimate Model for Quantile Question Aaymmetric, low Variance
  res[["asym_lvar_intwi"]]  <- est_split_cov(dat_asym, m ="Interval",
                                             tv=c(60,30), co = c(0.40,0.60))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["sym_hvar_intwi"]]  <- est_split_cov(dat_sym_var, m ="Interval",
                                            tv=c(30,30), co = c(0.40,0.60))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["asym_hvar_intwi"]]  <- est_split_cov(dat_asym_var, m ="Interval",
                                             tv=c(30,15), co = c(0.40,0.60))


#+ Intervall2, echo=FALSE, include=FALSE
## Intervall Question (Narrow) ============

  # Estimate Model for Quantile Question Symmetric, low variance
  res[["sym_lvar_intna"]]  <- est_split_cov(dat_sym, m ="Interval2",
                                            tv=c(60,60), co = c(0.45,0.55))
  
  # Estimate Model for Quantile Question Aaymmetric, low Variance
  res[["asym_lvar_intna"]]  <- est_split_cov(dat_asym, m ="Interval2",
                                             tv=c(60,30), co = c(0.45,0.55))
  
  # Estimate Model for Quantile Question Symmetric, high variance
  res[["sym_hvar_intna"]]  <- est_split_cov(dat_sym_var, m ="Interval2",
                                            tv=c(30,30), co = c(0.45,0.55))
  
  # Estimate Model for Quantile Question Symmetric, high variance
  res[["asym_hvar_intna"]]  <- est_split_cov(dat_asym_var, m ="Interval2",
                                             tv=c(30,15), co = c(0.45,0.55))

#+ Manski, echo=FALSE, include=TRUE
## Manski Question  ============

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_lvar_manski"]]  <- est_split_cov(dat_sym, m="Manski", tv=c(60,60)) 

  # Estimate Model for Quantile Question Asymmetric, low Variance
  res[["asym_lvar_manski"]]  <- est_split_cov(dat_asym, m="Manski", tv=c(60,30))

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_hvar_manski"]]  <- est_split_cov(dat_sym_var, m="Manski", tv=c(30,30))

  # Estimate Model for Quantile Question Asymmetric, low Variance
  res[["asym_hvar_manski"]]  <- est_split_cov(dat_asym_var, m="Manski", tv=c(30,15))

#+ BinsBalls, echo=FALSE, include=FALSE
## Bins and Balls Question  ============

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_lvar_binba"]]  <- est_split_cov(dat_sym,m="BinsBalls",tv= c(60,60))

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["asym_lvar_binba"]]  <- est_split_cov(dat_asym,m="BinsBalls",tv= c(60,30))

  # Estimate Model for Quantile Question Symmetric, high Variance
  res[["sym_hvar_binba"]]  <- est_split_cov(dat_sym_var,m="BinsBalls",tv= c(30,30))

  # Estimate Model for Quantile Question Symmetric, high Variance
  res[["asym_hvar_binba"]]  <- est_split_cov(dat_asym_var,m="BinsBalls",tv= c(30,15))

#+ Figure, echo=FALSE, include=TRUE, fig.cap = "Comparsion of average elecited beliefs and true data distribution"
## Figure Average Estimate  ============

  # Get estimates
  mdl_nam <- names(res)
  cov_nam <- names(res[[1]])
  
  par_est <- expand_grid(mdl_nam,cov_nam) %>%
            rowwise() %>%
            mutate(alpha = res[[mdl_nam]][[cov_nam]][["par"]][1],
                   beta = res[[mdl_nam]][[cov_nam]][["par"]][2],
                   N  = res[[mdl_nam]][[cov_nam]][["N"]])

  
  # Name estimates  & create Factors
  par_est <- par_est %>%
    separate("mdl_nam",c("type","var","method")) %>%
    separate("cov_nam",c("cov","name")) %>%
    mutate(type = factor(type, levels = c("sym","asym"), labels = c("Symmetric","Asymetric")),
           var = factor(var, levels = c("lvar","hvar"),labels = c("Small Variance","Large Variance")),
           method = factor(method, levels=c("quant","intwi","intna","manski","binba"),
                           labels = c("Quantile","Interval (Wide)",
                                              "Interval (Narrow)", "Manski",
                                              "Bins and Balls")))

  # Expand Data.frame
  expand.grid.df <- function(...) Reduce(function(...) merge(..., by=NULL), list(...))
  df_est <- expand.grid.df(data.frame("x"=seq(0.2,0.8,0.01)), par_est)

  # Prepare Data for Plot
  df_est       <- mutate(df_est, db = dbeta(x,alpha,beta))

  # Plot Data Education
  ggplot(filter(df_est,cov=="Education")) +
    geom_line(aes(x=x,y=db,linetype=name)) +
    facet_grid(method~ type+var) +
    scale_color_grey() +
    theme_minimal() +
    xlab("Vote Share") + ylab("Density") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.background = element_rect(fill = "grey96",
                             colour = "grey96",
                             size = 0, linetype = "solid"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )

  #ggsave("out/fig/Fig_ElecitedBeliefs_MLE_Average_Education.pdf",
   #      width = 12,height = 12)

  # Plot Data Education
  ggplot(filter(df_est,cov=="PolInt")) +
    geom_line(aes(x=x,y=db,linetype=name)) +
    facet_grid(method~ type+var) +
    scale_color_grey() +
    theme_minimal() +
    xlab("Vote Share") + ylab("Density") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.background = element_rect(fill = "grey96",
                                          colour = "grey96",
                                          size = 0, linetype = "solid"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )
  
  ggsave("out/fig/Fig_A4.pdf",
         width = 12,height = 12)
  
  # Plot Data Education
  ggplot(filter(df_est,cov=="Age")) +
    geom_line(aes(x=x,y=db,linetype=name)) +
    facet_grid(method~ type+var) +
    scale_color_grey() +
    theme_minimal() +
    xlab("Vote Share") + ylab("Density") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.background = element_rect(fill = "grey96",
                                          colour = "grey96",
                                          size = 0, linetype = "solid"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )
  
  #ggsave("out/fig/Fig_ElecitedBeliefs_MLE_Average_Age.pdf",
   #      width = 12,height = 12)
  
  # Plot Data Education
  ggplot(filter(df_est,cov=="Gender")) +
    geom_line(aes(x=x,y=db,linetype=name)) +
    facet_grid(method~ type+var) +
    scale_color_grey() +
    theme_minimal() +
    xlab("Vote Share") + ylab("Density") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.background = element_rect(fill = "grey96",
                                          colour = "grey96",
                                          size = 0, linetype = "solid"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )
  
#  ggsave("out/fig/Fig_ElecitedBeliefs_MLE_Average_Gender.pdf",
#         width = 12,height = 12)
  

## Estimates Table  ============
  
  # True Values
  df_true <- data.frame(expand.grid("type"=c("Symmetric","Asymetric"),
                                    "var"=c("Large Variance","Small Variance")) %>%
                          mutate(alpha_true = c(30,30,60,60),
                                 beta_true = c(30,15,60,30)))
  
  # Filter for Information and University
  par_est_sub <- filter(par_est, cov %in% c("Education","PolInt"))
  
  # Prepare Table for symmetric Results
  est_tab <- left_join(par_est_sub, df_true)  %>%
    rowwise() %>%
    mutate(KL = KL_dis_beta(c(alpha_true,beta_true), as.numeric(c(alpha,beta)))) %>%
    select(c("method","type","cov","name","var","alpha","beta","KL"))
  
  ls_est <- split(est_tab, list(est_tab$type, est_tab$var, est_tab$cov))
  nam_l <- c("A9_c","A9_d","A9_a","A9_b")
  catcher <- matrix(NA, 5,8)
  for(nam in names(ls_est)){
    dat_tab <-  ls_est[[nam]] %>%
      gather(variable, value, -(method:var)) %>%
      unite(tmp, name, variable)%>%
      spread(tmp, value) %>%
      select(-type, -cov,   -var)
    types <- strsplit(nam,"[.]")[[1]]
    text <- paste("Estimates for", types[1],"Distribution with",types[2],sep=" ")
    if (which(nam == names(ls_est))>4){
      scena <- which(nam == names(ls_est)[5:8])
      catcher[,2*(scena-1)+1] <- c(as.numeric(as.matrix(dat_tab[,4])))
      catcher[,2*(scena-1)+2] <- c(as.numeric(as.matrix(dat_tab[,7])))
   
    
    
    print(xtable(dat_tab, caption=text), floating = FALSE,booktabs = T, include.rownames=FALSE,
          #file=paste("out/fig/Tab_ElecitedBeliefs_Covar_",str_replace(nam," ",""),"_MLE.tex",sep=""))
          file=paste("out/fig/Tab_",str_replace(nam_l[scena]," ",""),".tex",sep=""))
  }
  }
  
  
  average.table <- dat_tab[,c(1,4,7)]

  average.table[,2] <- rowSums(catcher[,c(1,3,5,7)])/4
  average.table[,3] <- rowSums(catcher[,c(2,4,6,8)])/4
  
  
  print(xtable(average.table, caption=text), floating = FALSE,booktabs = T, include.rownames=FALSE,
        file="out/fig/Tab_A9_e.tex",sep="")
  
  
  
  